##----libraries----library(tidyverse)library(tidyquant)library(broom)library(umap)library(dplyr)library(tidyr)library(tibble)library(ggplot2)library(ggrepel)##----.read the data----sp_500_prices_tbl <-readRDS("C:/Users/mosta/Desktop/Business Decisions with Machine Learning/1/sp_500_prices_tbl.rds")sp_500_prices_tbl
sp_500_index_tbl <-readRDS("C:/Users/mosta/Desktop/Business Decisions with Machine Learning/1/sp_500_index_tbl.rds")sp_500_index_tbl
kmeans_obj <- stock_date_matrix_tbl %>%select(-symbol) %>%kmeans(centers =4, nstart =20)#Get the tot.withinss using glance()glance(kmeans_obj)
##----.step4 (Find the optimal value of K)----kmeans_mapper <-function(center =4) { stock_date_matrix_tbl %>%select(-symbol) %>%kmeans(centers = center, nstart =20)}4%>%kmeans_mapper() %>%glance()
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `k_means = centers %>% map(kmeans_mapper)`.
#> Caused by warning:
#> ! did not converge in 10 iterations
#Scree Plotkmeans_mapped_tbl %>%unnest(glance) %>%select(centers, tot.withinss) %>%ggplot(aes(centers, tot.withinss)) +geom_point(color ="#2DC6D6", size =4) +geom_line(color ="#2DC6D6", size =1) + ggrepel::geom_label_repel(aes(label = centers), color ="#2DC6D6",max.overlaps =20) +labs(title ="Scree Plot",subtitle ="Measures the distance each of the symbols are from the closes K-Means center",caption ="Conclusion: Based on the Scree Plot, We can see that the Scree Plot becomes linear (constant rate of change) between 5 and 10 centers for K.")
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.